---
title: "NDEI-Replication"
author: "Leah Matchett"
date: "1/3/2023"
output: html_document
---

Load Data
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)


library(tidyr)
library(dplyr)
library(data.table)
library(asciiSetupReader)
library(naniar)
library(sjPlot)
library(stargazer)
library(ggplot2)
library(ggthemes)
library(lfe)

load("C:/Users/lmatc/Dropbox/Dissertation Share Materials/Content Ch 1-NDEI/IndexConstruction_Replication/EA.Rdata")
#load("EA.Rdata") # This must first be created by IndexConstruction.Rmdc

setwd("Data") #windows
housecontracts = fread("house_milcontracts.csv")
senatecontracts = fread("senate_milcontracts.csv")


EA$dumb1 = ifelse(is.na(EA$ndaa),0,EA$ndaa) + ifelse(is.na(EA$milcom),0,EA$milcom) + ifelse(is.na(EA$sponsored_votes),0,log(EA$sponsored_votes+1))+
  ifelse(is.na(EA$attend),0,EA$attend ) + ifelse(is.na(EA$milCR), 0, EA$milCR)

EA$thick = ifelse(is.na(EA$ndaa),0,EA$ndaa) + ifelse(is.na(EA$milcom),0,EA$milcom)+ ifelse(is.na(EA$sponsored_votes),0,log(1+EA$sponsored_votes))+ EA$milcaucus +ifelse(is.na(EA$dod_contact),0,log(EA$dod_contact+1))+ ifelse(is.na(EA$attend),0,EA$attend ) + ifelse(is.na(EA$milCR), 0, EA$milCR)
 
EA$DEI=EA$dumb1

EA$district_code[EA$icpsr==195 & EA$year>1962 & EA$year<1965 & EA$state_abbrev=="AL"]=3 # Error correction- this got entered as 98

```

#Table 1: Summary Statistics
- change df name

```{r }
# changing the names to allow dplyr to create variables split by underscores
EA$sponsored.votes= EA$sponsored_votes
EA$dod.contact = EA$dod_contact

df <- tbl_df(EA)

df.sum <- EA %>%
  select(milcom, sponsored.votes, ndaa, milcaucus, attend, dod.contact) %>% # select variables to summarise
  summarise_each(funs(min = min(., na.rm=T), 
                      q25 = quantile(., 0.25, na.rm=T), 
                      median = median(., na.rm=T), 
                      q75 = quantile(., 0.75, na.rm=T), 
                      max = max(., na.rm=T),
                      mean = mean(., na.rm=T), 
                      sd = sd(., na.rm=T)))

# the result is a wide data frame, reshape it using tidyr functions

df.stats.tidy <- df.sum %>% gather(stat, val) %>%
  separate(stat, into = c("var", "stat"), sep = "_") %>%
  spread(stat, val) %>%
  select(var, min, q25, median, q75, max, mean, sd) %>% # reorder columns
  mutate_if(is.numeric, round, 2)


df.stats.tidy$var<- c("Pct committee attendance", "Number of DOD Contacts in a year","Military Caucus Chair","Military Committee","NDAA","Number Sponsored Defense Votes")
print(df.stats.tidy)
colnames(df.stats.tidy)<- c("Variable","Min","Q25","Median","Q75","Max","Med.","SD")
df.stats.tidy$Range <- c("1947-2018","2007-2020","2000-2018","1935-2018","1935-2018","1935-2018")

a = kable(df.stats.tidy) %>%
  kable_styling()

save_kable(a, file="Output/SummaryStats.pdf")
```

#Figure 3- NDEI  comparison to DW-Nominate


```{r }

a = ggplot(EA[!is.na(EA$milcom),], aes(x = nominate_dim1, y = DEI, color = factor(milcom)))+geom_point(alpha = 0.4)+
  theme_bw()+
  xlab("Nominate First Dimension")+
  ylab("National Defense Engagement Index")+
  scale_color_manual(name = "Military Committee", labels = c("Not on Mil Committee", "Mil. Committee Member"), values = c("lightpink1","blue4"))

ggsave(a, file="Output/DEI-Nominate Scatter.pdf")

```



#Figure 4: NDEI density plot
- need to change DF name

```{r}
# Density plot
ggplot(avg_EA[avg_EA$party_code==100 | avg_EA$party_code==200,], aes(x = dumb1, color = factor(party_code)))+
  theme_bw()+
  geom_density()+
  xlab("National Defense Engagement Index")+
  scale_color_manual("Party",labels = c("Democratic","Republican"),values = c("blue","grey"))

ggsave("Output/DEI-Density.png")


# Adding specific examples- I manually add these to the density plot that is output above.
# Add Bernie Sanders & McCain Labels
avg_EA[avg_EA$bioname=="SANDERS, Bernard",] # index = 1.16
avg_EA[avg_EA$bioname=="McCAIN, John Sidney, III",] # index = = 5.72
# Joe Courtney = 4.61

```

#Figure 5: Avg NDEI through time

```{r}


yearly_avgs = EA %>% group_by(year) %>% summarize(defscore = mean(dumb1, na.rm=T))

ggplot(yearly_avgs[yearly_avgs$year<2019,], aes(x=year, y=defscore))+
  annotate("rect", xmin = 1940.5, xmax = 1945.5, ymin = 0, ymax = Inf,
        alpha = .2, fill="lightblue")+
  annotate("text", x=1942 ,y=1, label="WWII" )+
  annotate("rect", xmin = 1949.5, xmax = 1953.5, ymin = 0, ymax = Inf,
        alpha = .2, fill="lightblue")+
  annotate("text", x=1952 ,y=0.9, label="Korea" )+
  annotate("rect", xmin = 1963.5, xmax = 1975.5, ymin = 0, ymax = Inf,
        alpha = .2, fill="lightblue")+ #vietnam
  annotate("text", x=1970 ,y=0.25, label="Vietnam" )+
  annotate("rect", xmin = 1990.5, xmax = 1991.5, ymin = 0, ymax = Inf,
        alpha = .2, fill="lightblue")+ #vietnam
  annotate("text", x=1990.5 ,y=0.25, label="Gulf War" )+
  annotate("rect", xmin = 2002.5, xmax = 2011.5, ymin = 0, ymax = Inf,
        alpha = .2, fill="lightblue")+ #vietnam
  annotate("text", x= 2007,y=0.25, label="Iraq War")+
    geom_vline(xintercept= 2001+9/12)+ #9/11
  annotate("text", x=1999.5 ,y=0.5, label="9/11" )+
  geom_point()+
  theme_bw()+    
  ylab("Average National Defense Engagement Score")+
  xlab("Year")+
  ggtitle("Average NDEI Score")+
  theme(plot.title = element_text(hjust = 0.5))

ggsave(path = "Output", filename = "DEI-through-time.png")

```


#Table 2: Veterans and Defense engagement

```{r}

EA$Republican= ifelse(EA$party_code==200,1,0)

EAreg1 = felm(log(dumb1+0.0001) ~ veteran |0|0| icpsr+year, data= EA[EA$year<2019,])

EAreg2 = felm(log(dumb1+0.0001) ~ veteran +Republican+ chamber|0|0| icpsr+year, data= EA[EA$year<2019,])

EAreg_robustness = felm(log(dumb1+0.0001) ~ veteran +Republican+ chamber|0|0| icpsr+year, data= EA[EA$year<1997,])# robustness check

# Creating a test where I drop Committee from the DEI and see if veteran still matters
EA$DEI_nocom= ifelse(is.na(EA$ndaa),0,EA$ndaa) + ifelse(is.na(EA$sponsored_votes),0,log(EA$sponsored_votes+1))+ ifelse(is.na(EA$milCR), 0, EA$milCR)

EAreg3 = felm(log(DEI_nocom+0.0001) ~ veteran +Republican+ chamber|0|0| icpsr+year, data= EA[EA$year<2019,])


stargazer(EAreg1, EAreg2,EAreg_robustness, EAreg3, covariate.labels = c("Veteran","Republican","Senate"),
          dep.var.labels   = c("log(NDEI)", "log(NDEI)(no Committees)"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes        = "* = $p<0.5$ ; ** = $p<0.01$ ; *** = $p<0.001$", 
          notes.append = FALSE)

```

#Table 3: Military contract Awards and Engagement

```{r}

### ADDING IN CONTRACT DATA
# need to merge in icpsr for states from the ideology df
test = merge(EA, Ideology[!duplicated(Ideology[,c("state_icpsr","state_abbrev")],),c("state_icpsr","state_abbrev")], by= "state_abbrev", all.x=T)
EA = test

# SENATE
EA_sen = EA[EA$chamber=="Senate",]
senatecontracts= senatecontracts[which(senatecontracts$year>1998 & senatecontracts$year<2021),]

# creating lags
senatecontracts = senatecontracts %>%
  group_by(state_code) %>%
  mutate(lag_amount = lag(amount, order_by = year),
         lead_amount = lead(amount, order_by = year),
         lag_num = lag(number, order_by = year),
         lead_num = lead(number, order_by = year))

test1= merge(EA_sen, senatecontracts[,c('state_code','year','amount','number','lag_amount','lead_amount','lag_num','lead_num')], by.x=c("state_icpsr", "year"), by.y=c("state_code", "year") )

#HOUSE
EA_house = EA[EA$chamber=="House",]
housecontracts= housecontracts[which(housecontracts$year>1998 & housecontracts$year<2021),]
housecontracts$dist = as.numeric(housecontracts$dist)

housecontracts = housecontracts %>%
  group_by(state_code, dist) %>%
  mutate(lag_amount = lag(amount, order_by = year),
         lead_amount = lead(amount, order_by = year),
         lag_num = lag(number, order_by = year),
         lead_num = lead(number, order_by = year))

test2= merge(EA_house, housecontracts[,c('state_code','year', 'dist','amount','number', 'lag_amount','lead_amount','lag_num','lead_num')], by.x=c("state_icpsr",'district_code', "year"), by.y=c("state_code", "dist","year"), all.x=T )

EA2 = rbind(test1, test2)

# CLEANING THIS UP
# year coverage is from 1999 to 2020
EA2 = EA2[which(EA2$year>1998 & EA2$year<2021),]

EA2$amount_no_z= ifelse(EA2$amount>0,EA2$amount,0) # some amounts are below 0 because of refunds. I'm going to zero these out
# many ammounts are NA since there wasn't any data on that district-year. Assume these are missing from the data because they're zero
EA2$amount_zeros = ifelse(!is.na(EA2$amount_no_z),EA2$amount_no_z,0)

# same with lags
EA2$amount_no_l= ifelse(EA2$lag_amount>0,EA2$lag_amount,0) # some amounts are below 0 because of refunds. I'm going to zero these out
# many ammounts are NA since there wasn't any data on that district-year. Assume these are missing from the data because they're zero
EA2$amount_zeros_l = ifelse(!is.na(EA2$amount_no_l),EA2$amount_no_l,0)


#Regressions need to be split out by chamber, since senate will necessarily be larger and also senate DEIs are bigger in general


#___Senate
sen_base = felm(log(dumb1+0.0001) ~ log(amount_zeros_l+1) |0|0|state_abbrev, data = EA2[EA2$chamber=="Senate",])
summary(sen_base, robust=T)

sen = felm(log(dumb1+0.0001) ~ log(amount_zeros_l+1) + Republican+ veteran |0|0|state_abbrev, data = EA2[EA2$chamber=="Senate",])
summary(sen, robust=T)

# House
house_base = felm(log(dumb1+0.0001) ~ log(amount_zeros_l+1)|0|0|state_abbrev, data = EA2[EA2$chamber=="House",])
summary(house_base, robust=T)

house = felm(log(dumb1+0.0001) ~ log(amount_zeros_l+1) + Republican+ veteran |0|0|state_abbrev, data = EA2[EA2$chamber=="House",])
summary(house, robust=T)


#_____________________________TABLE_____________________________
stargazer(sen_base, sen, house_base, house, covariate.labels = c("Military Contracts","Republican","Veteran"),
          dep.var.labels   = c("log(NDEI)"),
          column.labels = c("Senate","House"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes        = ". = $p<0.5$ ; * = $p<0.01$ ; ** = $p<0.001$", 
          notes.append = FALSE)


```

# Table 4: BRAC check

NEED TO UPLOAD NEW FILE
- fix DEI cum house
- re-input data into submission
```{r}

# NEED TO LOAD SEPARATE COMPLETE DF HERE

DEI_cum_house = felm(log(DEI+0.001)~ cum_closures +year|icpsr|0| icpsr, data=df)
summary(DEI_cum_house)


DEI_cum_senate = felm(log(DEI+0.001)~ cum_closures+year |icpsr|0| icpsr, data= df_senate)
summary(DEI_cum_senate)

stargazer(DEI_cum_house,DEI_cum_senate,
           column.labels=c("House","Senate"),
          covariate.labels = c("Cumulative Base Closure","Year"),
          dep.var.labels = "Log Defense Engagement  (NDEI)", 
          add.lines = list(c("Member FE?","Yes", "Yes"))
          )


```



#______________________APPENDIX REPLICATION

# Figure 1


# Figure 2

# Figure 3


# Seniority

#Tables 2-4

# Figure 5: Party 1960 position Change

#Figure 6


#Figure 7: Buck McKeon

#Figure 8: John Towers

# Figure 9: Earmarks Correlation

